********************************************************************************************
*** Alexandra O. Zeitz & David A. Leblang
*** Migrants as Engines of Financial Globalization: The Case of Global Banking
*** Last updated: October 5, 2020
********************************************************************************************

**********************************************
*** 0. PACKAGES
** The analysis requires up-to-date versions of the following packages:
ssc install ppmlhdfe, replace
ssc install reghdfe, replace
ssc install tuples, replace

**********************************************
*** 1. ASSEMBLE DATA

use "working_5yr.dta", replace

*Variable transformations
genl lndistw=log(distw)
genl lnUNstockfromDinO=log(UNstockfromDinO+1)
genl lnUNstockfromOinD=log(UNstockfromOinD+1)

replace instock=.1 if instock<0
gen lninstock=log(instock+1)

rename OECD_o oecd_o
rename OECD_d oecd_d



*Dependent Variables
replace connected=1 if connected>1 & connected~=.
replace connected=. if period==1
replace connections=. if period==1

tsset dyad period
label define yearsmig 1 "1: 1990-1994" 2 "2: 1995-1999" 3 "3: 2000-2004" 4 "4: 2005-2009" 5 "5: 2010-2014", replace
label define yearsbank 1 "90-94" 2 "95-99" 3 "00-04" 4 "05-09" 5 "10-13", replace
label values period yearsmig

*Create an indicator for dyads for descriptive statistics
* Remove Chile, Israel, Korea and Mexico from OECD since they join during the time period under study
replace oecd_d=0 if iso_d=="CHL"
replace oecd_o=0 if iso_o=="CHL"
replace oecd_d=0 if iso_d=="ISL"
replace oecd_o=0 if iso_o=="ISL"
replace oecd_d=0 if iso_d=="KOR"
replace oecd_o=0 if iso_o=="KOR"
replace oecd_d=0 if iso_d=="MEX"
replace oecd_o=0 if iso_o=="MEX"

*Create indicator for dyad OECD membership
gen oecdlink=.
replace oecdlink=1 if oecd_o==1 & oecd_d==1
replace oecdlink=2 if oecd_o==0 & oecd_d==0
replace oecdlink=3 if oecd_o==0 & oecd_d==1
replace oecdlink=4 if oecd_o==1 & oecd_d==0
label define northsouth1 1 "North-North" 2 "South-South" 3 "South-North" 4 "North-South"
label values oecdlink northsouth1

*Difference in PC GDP
genl gdppc_constant_diff=gdppc_constant_d/gdppc_constant_o

*Indicator if both jurisdictions are offshore
gen dyad_offshore=.
replace dyad_offshore=0 if cvh_offshore_d!=. & cvh_offshore_o!=.
replace dyad_offshore=1 if cvh_offshore_d==1 & cvh_offshore_o==1


*Difference in requlatory quality 
replace rqe_o=rqe_o+2.5
replace rqe_d=rqe_d+2.5
genl rqe_diff=rqe_d/rqe_o

*Rescale trade	
replace trade_UN=trade_UN*100
genl lntrade_UN=log(trade_UN+1)

*Round connections variable in order to run poisson models 
gen rconnections = round(connections)

*Create dummies for host*period and home*period
encode iso_o, gen(code_o)
egen push=group(code_o period)
encode iso_d, gen(code_d)
egen pull=group(code_d period)


*Drop exclusively offshore dyads
drop if dyad_offshore==1

*Set data up for analysis
tsset dyad period
set matsize 11000
set scheme s2mono

**********************************************
*** 2. ANALYSIS USING 5-YEAR PERIOD BANKING INVESTMENT DATA

*** 2A) Main Models

*DV: Count of banks: Table 1, Column 1
qui ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff   trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) , cl(code_o code_d) abs(push pull) d(AA)
est store AA
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)16)) saving(fileAA,replace)
marginsplot, title("All Corridors") saving(fileAA,replace)

*DV: Any banking investment, for Table A3 and Figure 5
reg connected l.(i.connected c.lndistw i.contig i.comlang_off colony comcur comrel comleg rqe_diff  c.trade_UN  c.gdppc_constant_diff c.lnUNstockfromOinD c.lninstock) i.push i.pull if e(sample) , cl(dyad) 
est store A
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)14)) saving(fileA, replace) 

*DV: Count of banks: Table 1, Column 2
qui ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock)   if  oecdlink==1 , cl(code_o code_d) abs(push pull) d(BB)
est store BB
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)16)) saving(fileBB,replace)
marginsplot, title("North->North") saving(fileBB,replace)

*DV: Any banking investment, for Table A3 and Figure 5
reg connected l.(connected lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) i.push i.pull if e(sample) , cl(dyad) 
est store B
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)16)) saving(fileB, replace) 

*DV: Count of banks: Table 1, Column 3
qui ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock)  if oecdlink==2 , cl(code_o code_d) abs(push pull) d(CC)
est store CC
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)15)) saving(fileCC,replace)
marginsplot, title("South->South") saving(fileCC,replace)

*DV: Any banking investment, for Table A3 and Figure 5
reg connected l.(connected lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) i.push i.pull if e(sample) , cl(dyad) 
est store C
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)16)) saving(fileC, replace) 

*DV: Count of banks: Table 1, Column 4
qui ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock)  if  oecdlink==3 , cl(code_o code_d) abs(push pull) d(DD)
est store DD
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)12)) saving(fileDD,replace)
marginsplot, title("South->North") saving(fileDD,replace)

*DV: Any banking investment, for Table A3 and Figure 5
reg connected l.(connected lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) i.push i.pull  if  e(sample) , cl(dyad)
est store D
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)16)) saving(fileD, replace) 

*DV: Count of banks: Table 1, Column 5
qui ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock)  if  oecdlink==4 , cl(code_o code_d) abs(push pull) d(EE)
est store EE
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)13)) saving(fileEE, replace) 
marginsplot, title("North->South") saving(fileEE,replace)

*DV: Any banking investment, for Table A3 and Figure 5
reg connected l.(connected lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) i.push i.pull if e(sample) , cl(dyad) 
est store E
margins if e(sample), at(l.lnUNstockfromOinD=(0(1)16)) saving(fileE, replace) 


label var lnUNstockfromOinD "Ln(Migrants from Home in Host)"

label var connected "Connected"
label var connections "Connections"
label var lnUNstockfromOinD "Ln(Migrants)"
label var trade_UN "Bilateral trade"
label var lninstock "Ln(Bilateral FDI)"
label var gdppc_constant_diff "GDP ratio"
label var lndistw "Ln(Distance)"
label var contig "Common Border"
label var comlang_off "Common Language"
label var colony "Common colony"
label var comcur "Common Currency"
label var comrel "Common Religion"
label var rqe_diff "Diff Reg Quality"

set scheme s1mono

* Make Table 1
esttab AA BB CC DD EE using table1.rtf,  replace se star(* 0.10 ** 0.05 *** 0.01) label  nobaselevels 

* Make Table A3 (Supplementary Material)
esttab A B C D E  using tableA3.rtf,  replace se star(* 0.10 ** 0.05 *** 0.01) label drop(*push* *pull*) nobaselevels 

* Make Figure 4
combomarginsplot fileAA fileBB fileCC fileDD fileEE, labels("Full Sample" "North->North" "South->South" "South->North" "North->South") legend(on) noci
graph export figure4.eps, replace

* Make Figure 5
combomarginsplot fileA fileB fileC fileD fileE, labels("Full Sample" "North->North" "South->South" "South->North" "North->South")  legend(on) noci
graph export figure5.eps, replace

* Make Figure A1
graph combine "fileAA" "fileBB" "fileCC" "fileDD" "fileEE"
graph export figurea1.eps, replace

* Make Figure A2
combomarginsplot fileA fileB fileC fileD fileE, labels("Full Sample" "North->North" "South->South" "South->North" "North->South")  legend(on) 
graph export figurea2.eps, replace

* Erase unnecessary files
foreach i in fileAA.dta fileAA.gph fileBB.dta fileBB.gph fileCC.dta fileCC.gph fileDD.dta fileDD.gph fileEE.dta fileEE.gph fileA.dta fileB.dta fileC.dta fileD.dta fileE.dta {
	erase "`i'"
}


*** 2B) Discovery of new markets
set scheme plotplain

* DV: Binary measure of investment: Table 3, column 1
* First, define sample 
qui ppmlhdfe connected l.(c.lnUNstockfromOinD##c.connected lndistw contig comlang_off  colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lninstock),  abs(push pull) cl(code_o code_d)
* Then, run logit on that sample
logit connected l.( c.lnUNstockfromOinD##connected lndistw contig comlang_off  colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff lninstock) i.push i.pull if e(sample), cl(dyad) nolog
est store Q
margins if e(sample) & l.connected==0, at(l.lnUNstockfromOinD=(0(1)9)) 
marginsplot, saving(file1.gph, replace)
margins if e(sample) & l.connected==1, at(l.lnUNstockfromOinD=(0(1)9))
marginsplot, saving(file2.gph, replace)

* DV: Binary measure of greenfield investment: Table 3, column 2
*First, define sample
qui ppmlhdfe greenfield l.( c.lnUNstockfromOinD greenfield lndistw contig comlang_off  colony comcur  comleg rqe_diff  trade_UN   gdppc_constant_diff lninstock),  abs(push pull) cl(code_o code_d)
* Then, run logit on that sample
logit greenfield l.(greenfield lndistw contig comlang_off colony comcur  comleg rqe_diff  trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) i.push i.pull if e(sample), cl(dyad) nolog
est store QQ

*Make Table 3
esttab Q QQ using table3.rtf, drop(*push* *pull*) replace se label nobaselevels star(* 0.10 ** 0.05 *** 0.01)  

* Make Figure 6
graph combine file1.gph file2.gph
graph export figure6.eps, replace

*Erase unnecessary files
foreach i in file1.gph file2.gph{
erase "`i'"
}

**********************************************
*** 3. SUMMARY


** Make Figure 1
label values period yearsbank
graph bar connected if period>1, over(period, label(labsize(vsmall))) over(oecdlink) scheme(s2color) graphregion(color(white)) ytitle("Share of dyads with banking investment") blabel(total, format(%9.2f)) intensity(50)
graph export figure1.eps, replace


** Make Table A1
keep if connected!=.
outreg2 using tablea1.rtf, replace sum(log) keep(connections connected lnUNstockfromOinD lninstock trade_UN gdppc_constant_diff rqe_diff comcur comlang_off lndistw contig  colony comleg comrel) sortvar(connections connected lnUNstockfromOinD lninstock trade_UN gdppc_constant_diff rqe_diff comcur comlang_off lndistw contig  colony comleg comrel)

** Make Table A2
* Subset data used in main models
qui ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff trade_UN  gdppc_constant_diff lnUNstockfromOinD lninstock) , cl(code_o code_d) abs(push pull) 
keep if e(sample)
outreg2 using tablea2.rtf, replace sum(log) keep(connections connected lnUNstockfromOinD lninstock trade_UN gdppc_constant_diff rqe_diff comcur comlang_off lndistw contig  colony comleg comrel) sortvar(connections connected lnUNstockfromOinD lninstock trade_UN gdppc_constant_diff rqe_diff comcur comlang_off lndistw contig  colony comleg comrel)

* Erase unnecessary files
foreach i in tablea1.txt tablea2.txt{
	erase "`i'"
}

**********************************************
*** 4. ANALYSIS USING ANNUAL BANKING INVESTMENT DATA

use "working_panel.dta", replace

*** 4A) Regularization and Naturalization extension
* Regularization 
ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff  lninstock) regul , cl(dyad) abs(push iso_d year) 
est store Q 
* Naturalizations
ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff  lninstock lnnat)  , cl(dyad) abs(push pull) 
est store R

** Make Table 2
esttab Q R using table2.rtf,  replace se star(* 0.10 ** 0.05 )  label


*** 4C) Foreign students
ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff  lninstock lnforeignstudent)  , cl(dyad) abs(push pull) 
est store T
ppmlhdfe connections l.(connections lndistw contig comlang_off colony comcur comrel comleg rqe_diff  trade_UN  gdppc_constant_diff  lninstock lnforeignstudent) if lnforeignstudent~=0 , cl(dyad) abs(push pull) 
est store U
 
** Make Table A4
esttab T U using tablea4.rtf,  replace se star(* 0.10 ** 0.05 )  label

**********************************************
*** 5. ANALYSIS USING REMITTANCE DATA

use "remittances.dta", clear

** Table A5, Column 1: OLS
reg lnremit c.lnstock##c.l.connected lndist contig comlang_off comcol lngdpconstant* exchangerate_perusdollar_o inflation_consumerprices_o polity_score_o totaldisasters_o i.code_o i.code_d i.year, cl(pair)
est store A

** Figure A3
margins if e(sample), at(lnstock=(0(1)10) l.connected=(0 1)) 
set scheme plotplain
marginsplot, plot( , label("Banking Investment = 0" "Banking Investment = 1"))
graph export figurea3.eps, replace


** Table A5, Column 2: Poisson
ppmlhdfe remittances c.lnstock##c.l.connected lndist contig comlang_off comcol  lngdpconstant* exchangerate_perusdollar_o inflation_consumerprices_o polity_score_o totaldisasters_o, cl(dyad) abs(code_o code_d year)
est store B

** Table A5, Column 3: Tobit
tobit remittances c.lnstock##c.l.connected lndist contig comlang_off comcol  lngdpconstant* exchangerate_perusdollar_o inflation_consumerprices_o polity_score_o totaldisasters_o i.code_o i.code_d i.year, vce(robust)
est store C

** Make Table A5
esttab A B C using tablea5.rtf,  replace se star(* 0.10 ** 0.05 )  label nobaselevels drop(*code* *year*)

